In [916]:
# Importar pandas
import pandas as pd

# Cargar el archivo CSV local con punto y coma como delimitador
file_path = "fallecidos_covid.csv"
covid_data = pd.read_csv(file_path, delimiter=';')

# Mostrar información sobre el DataFrame cargado
covid_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220918 entries, 0 to 220917
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   FECHA_CORTE          220918 non-null  int64  
 1   FECHA_FALLECIMIENTO  220918 non-null  int64  
 2   EDAD_DECLARADA       220918 non-null  int64  
 3   SEXO                 220918 non-null  object 
 4   CLASIFICACION_DEF    220918 non-null  object 
 5   DEPARTAMENTO         220918 non-null  object 
 6   PROVINCIA            220913 non-null  object 
 7   DISTRITO             220913 non-null  object 
 8   UBIGEO               220913 non-null  float64
 9   UUID                 218449 non-null  float64
dtypes: float64(2), int64(3), object(5)
memory usage: 16.9+ MB
In [918]:
covid_data.head()
Out[918]:
FECHA_CORTE FECHA_FALLECIMIENTO EDAD_DECLARADA SEXO CLASIFICACION_DEF DEPARTAMENTO PROVINCIA DISTRITO UBIGEO UUID
0 20240317 20220219 63 MASCULINO Criterio virológico TUMBES TUMBES TUMBES 240101.0 203506.0
1 20240317 20210529 74 MASCULINO Criterio virológico TUMBES TUMBES TUMBES 240101.0 203532.0
2 20240317 20210623 72 FEMENINO Criterio SINADEF TACNA TACNA ALTO DE LA ALIANZA 230102.0 203584.0
3 20240317 20210824 85 MASCULINO Criterio investigación Epidemiológica TUMBES TUMBES TUMBES 240101.0 212541.0
4 20240317 20210627 46 MASCULINO Criterio virológico TACNA TACNA ALTO DE LA ALIANZA 230102.0 203665.0
In [920]:
covid_data.FECHA_FALLECIMIENTO.value_counts()
Out[920]:
FECHA_FALLECIMIENTO
20210409    828
20210420    822
20210413    821
20210419    820
20210416    820
           ... 
20230909      1
20231129      1
20230919      1
20240227      1
20231214      1
Name: count, Length: 1397, dtype: int64
In [922]:
# Asegúrate de que la columna 'FECHA_RESULTADO' existe y convierte 'AÑO_RESULTADO' extrayendo el año
covid_data['AÑO_FALLECIMIENTO'] = pd.to_numeric(covid_data['FECHA_FALLECIMIENTO'].astype(str).str[:4], errors='coerce')


# Contabilizar los casos por año en el DataFrame filtrado
casos_por_año_filtrado = covid_data['AÑO_FALLECIMIENTO'].value_counts().sort_index()

# Mostrar el resultado
print(casos_por_año_filtrado)
AÑO_FALLECIMIENTO
2020     95331
2021    107962
2022     14742
2023      2570
2024       313
Name: count, dtype: int64
In [923]:
covid_data.CLASIFICACION_DEF.value_counts()
Out[923]:
CLASIFICACION_DEF
Criterio virológico                      84557
Criterio SINADEF                         66198
Criterio serológico                      42910
Criterio investigación Epidemiológica    10783
Criterio clínico                          8994
Criterio radiológico                      5113
Criterio nexo epidemiológico              2215
NotiCovid                                  132
sinadef                                     16
Name: count, dtype: int64

Criterio virológico: Muerte en un caso confirmado de COVID-19 que fallece en los 60 días posteriores a una prueba molecular (PCR) o antigénica reactiva para SARS-CoV-2. Criterio serológico: Muerte en un caso confirmado de COVID-19 que fallece en los 60 días posteriores a una prueba serológica positiva IgM o IgM/IgG para SARS-CoV-2. Criterio radiológico: Muerte en un caso probable de COVID-19 que presenta una imagen radiológica, tomográfica o de resonancia magnética nuclear compatible con neumonía COVID-1 Criterio nexo epidemiológico: Muerte en un caso probable de COVID-19 que presenta nexo epidemiológico con un caso confirmado de COVI Criterio investigación epidemiológica: Muerte en un caso sospechoso de COVID-19 que es verificado por investigación epidemiológica de la Red Nacional de Epidemiología (RE · Criterio clínico: Muerte en un caso sospechoso de COVID-19 que presenta cuadro clínico compatible con la en C · Criterio SINADEF Muerte con certificado de defunción en el que se presenta el diagnóstico de COVID-19 como causa de la muerte. El fallecimiento por COVID-19 en el certificado de defunción está definido por la presencia en los campos A, B, C o D de los códigos CIE-10: U071, U072, B342, B972, o la mención de los términos “coronavirus”, “cov-2”, “cov2”, “covid” y “sars”.

In [927]:
# Unificar todas las variantes de "sinadef" en "Criterio SINADEF"
covid_data['CLASIFICACION_DEF'] = covid_data['CLASIFICACION_DEF'].replace(
    ['sinadef', 'sinadef', 'criterio sinadef', 'Criterio sinadef', 'Criterio SINADEF'], 'Criterio SINADEF'
)

# Verificar el conteo nuevamente para ver los cambios
covid_data['CLASIFICACION_DEF'].value_counts()
Out[927]:
CLASIFICACION_DEF
Criterio virológico                      84557
Criterio SINADEF                         66214
Criterio serológico                      42910
Criterio investigación Epidemiológica    10783
Criterio clínico                          8994
Criterio radiológico                      5113
Criterio nexo epidemiológico              2215
NotiCovid                                  132
Name: count, dtype: int64
In [929]:
# Filtrar los datos para excluir la categoría 'NotiCovid' en la columna CLASIFICACION_DEF
covid_criterios = covid_data[covid_data.CLASIFICACION_DEF != 'NotiCovid']

# Mostrar las primeras filas del nuevo DataFrame
covid_criterios.head()
Out[929]:
FECHA_CORTE FECHA_FALLECIMIENTO EDAD_DECLARADA SEXO CLASIFICACION_DEF DEPARTAMENTO PROVINCIA DISTRITO UBIGEO UUID AÑO_FALLECIMIENTO
0 20240317 20220219 63 MASCULINO Criterio virológico TUMBES TUMBES TUMBES 240101.0 203506.0 2022
1 20240317 20210529 74 MASCULINO Criterio virológico TUMBES TUMBES TUMBES 240101.0 203532.0 2021
2 20240317 20210623 72 FEMENINO Criterio SINADEF TACNA TACNA ALTO DE LA ALIANZA 230102.0 203584.0 2021
3 20240317 20210824 85 MASCULINO Criterio investigación Epidemiológica TUMBES TUMBES TUMBES 240101.0 212541.0 2021
4 20240317 20210627 46 MASCULINO Criterio virológico TACNA TACNA ALTO DE LA ALIANZA 230102.0 203665.0 2021
In [931]:
indexList=['AÑO_FALLECIMIENTO','DEPARTAMENTO','PROVINCIA','CLASIFICACION_DEF']
aggregator={'CLASIFICACION_DEF':[len]}
covid_provYear=covid_criterios.groupby(indexList,observed=True).agg(aggregator)
covid_provYear
Out[931]:
CLASIFICACION_DEF
len
AÑO_FALLECIMIENTO DEPARTAMENTO PROVINCIA CLASIFICACION_DEF
2020 AMAZONAS BAGUA Criterio SINADEF 39
Criterio clínico 13
Criterio investigación Epidemiológica 58
Criterio nexo epidemiológico 19
Criterio radiológico 6
... ... ... ... ...
2024 PIURA SULLANA Criterio virológico 1
TALARA Criterio virológico 1
PUNO PUNO Criterio virológico 1
SAN ROMAN Criterio virológico 2
TACNA TACNA Criterio SINADEF 1

2869 rows × 1 columns

In [933]:
covidDraft=covid_provYear.unstack(3).fillna(0) #leftmost index in rows
covidDraft
Out[933]:
CLASIFICACION_DEF
len
CLASIFICACION_DEF Criterio SINADEF Criterio clínico Criterio investigación Epidemiológica Criterio nexo epidemiológico Criterio radiológico Criterio serológico Criterio virológico
AÑO_FALLECIMIENTO DEPARTAMENTO PROVINCIA
2020 AMAZONAS BAGUA 39.0 13.0 58.0 19.0 6.0 109.0 26.0
BONGARA 3.0 1.0 1.0 1.0 0.0 15.0 4.0
CHACHAPOYAS 16.0 3.0 1.0 0.0 4.0 30.0 7.0
CONDORCANQUI 17.0 17.0 24.0 3.0 0.0 18.0 0.0
LUYA 6.0 1.0 2.0 1.0 1.0 9.0 0.0
... ... ... ... ... ... ... ... ... ...
2024 PIURA SULLANA 3.0 0.0 0.0 0.0 0.0 0.0 1.0
TALARA 0.0 0.0 0.0 0.0 0.0 0.0 1.0
PUNO PUNO 0.0 0.0 0.0 0.0 0.0 0.0 1.0
SAN ROMAN 0.0 0.0 0.0 0.0 0.0 0.0 2.0
TACNA TACNA 1.0 0.0 0.0 0.0 0.0 0.0 0.0

768 rows × 7 columns

In [935]:
covidDraft['CRITERIO_pct']=covidDraft.iloc[:,1]/(covidDraft.iloc[:,0] + covidDraft.iloc[:,1])
covid_provYear_Alarm_w=covidDraft['CRITERIO_pct'].unstack('AÑO_FALLECIMIENTO').fillna(0)
covid_provYear_Alarm_w
Out[935]:
AÑO_FALLECIMIENTO 2020 2021 2022 2023 2024
DEPARTAMENTO PROVINCIA
AMAZONAS BAGUA 0.250000 0.153846 0.052632 0.0 0.0
BONGARA 0.250000 0.166667 0.000000 0.0 0.0
CHACHAPOYAS 0.157895 0.161290 0.285714 0.0 0.0
CONDORCANQUI 0.500000 0.250000 0.000000 0.0 0.0
LUYA 0.142857 0.153846 0.000000 0.0 0.0
... ... ... ... ... ... ...
TUMBES ZARUMILLA 0.306122 0.156250 0.000000 0.0 0.0
UCAYALI ATALAYA 0.562500 0.210526 0.000000 0.0 0.0
CORONEL PORTILLO 0.283002 0.374046 0.000000 0.0 0.0
PADRE ABAD 0.441860 0.388889 0.333333 0.0 0.0
PURUS 0.000000 0.000000 0.000000 0.0 0.0

196 rows × 5 columns

In [937]:
covid_provYear_Alarm_w.columns
Out[937]:
Index([2020, 2021, 2022, 2023, 2024], dtype='int64', name='AÑO_FALLECIMIENTO')
In [939]:
covid_provYear_Alarm_w.columns=['Año'+str(x) for x in covid_provYear_Alarm_w.columns]
In [941]:
covid_provYear_Alarm_w
Out[941]:
Año2020 Año2021 Año2022 Año2023 Año2024
DEPARTAMENTO PROVINCIA
AMAZONAS BAGUA 0.250000 0.153846 0.052632 0.0 0.0
BONGARA 0.250000 0.166667 0.000000 0.0 0.0
CHACHAPOYAS 0.157895 0.161290 0.285714 0.0 0.0
CONDORCANQUI 0.500000 0.250000 0.000000 0.0 0.0
LUYA 0.142857 0.153846 0.000000 0.0 0.0
... ... ... ... ... ... ...
TUMBES ZARUMILLA 0.306122 0.156250 0.000000 0.0 0.0
UCAYALI ATALAYA 0.562500 0.210526 0.000000 0.0 0.0
CORONEL PORTILLO 0.283002 0.374046 0.000000 0.0 0.0
PADRE ABAD 0.441860 0.388889 0.333333 0.0 0.0
PURUS 0.000000 0.000000 0.000000 0.0 0.0

196 rows × 5 columns

In [943]:
covid_provYear_Alarm_w.reset_index(inplace=True)
covid_provYear_Alarm_w
Out[943]:
DEPARTAMENTO PROVINCIA Año2020 Año2021 Año2022 Año2023 Año2024
0 AMAZONAS BAGUA 0.250000 0.153846 0.052632 0.0 0.0
1 AMAZONAS BONGARA 0.250000 0.166667 0.000000 0.0 0.0
2 AMAZONAS CHACHAPOYAS 0.157895 0.161290 0.285714 0.0 0.0
3 AMAZONAS CONDORCANQUI 0.500000 0.250000 0.000000 0.0 0.0
4 AMAZONAS LUYA 0.142857 0.153846 0.000000 0.0 0.0
... ... ... ... ... ... ... ...
191 TUMBES ZARUMILLA 0.306122 0.156250 0.000000 0.0 0.0
192 UCAYALI ATALAYA 0.562500 0.210526 0.000000 0.0 0.0
193 UCAYALI CORONEL PORTILLO 0.283002 0.374046 0.000000 0.0 0.0
194 UCAYALI PADRE ABAD 0.441860 0.388889 0.333333 0.0 0.0
195 UCAYALI PURUS 0.000000 0.000000 0.000000 0.0 0.0

196 rows × 7 columns

In [945]:
mapLink='https://github.com/SocialAnalytics-StrategicIntelligence/GeoDF_Analytics/raw/main/maps/ProvsINEI2023.zip'

import geopandas as gpd

provmap=gpd.read_file(mapLink)

provmap.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    196 non-null    float64 
 1   CCDD        196 non-null    object  
 2   CCPP        196 non-null    object  
 3   DEPARTAMEN  196 non-null    object  
 4   PROVINCIA   196 non-null    object  
 5   geometry    196 non-null    geometry
dtypes: float64(1), geometry(1), object(4)
memory usage: 9.3+ KB
In [946]:
provmap['location']=['+'.join(x[0]) for x in zip(provmap.iloc[:,3:5].values)]
provmap.head(10)
Out[946]:
OBJECTID CCDD CCPP DEPARTAMEN PROVINCIA geometry location
0 1.0 01 01 AMAZONAS CHACHAPOYAS POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... AMAZONAS+CHACHAPOYAS
1 2.0 01 02 AMAZONAS BAGUA POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... AMAZONAS+BAGUA
2 3.0 01 03 AMAZONAS BONGARA POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... AMAZONAS+BONGARA
3 4.0 01 04 AMAZONAS CONDORCANQUI POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... AMAZONAS+CONDORCANQUI
4 5.0 01 05 AMAZONAS LUYA POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... AMAZONAS+LUYA
5 6.0 01 06 AMAZONAS RODRIGUEZ DE MENDOZA POLYGON ((-77.44452 -6.05002, -77.44387 -6.050... AMAZONAS+RODRIGUEZ DE MENDOZA
6 7.0 01 07 AMAZONAS UTCUBAMBA POLYGON ((-78.09288 -5.36258, -78.09288 -5.364... AMAZONAS+UTCUBAMBA
7 8.0 02 01 ANCASH HUARAZ POLYGON ((-77.39870 -9.35563, -77.39852 -9.356... ANCASH+HUARAZ
8 9.0 02 02 ANCASH AIJA POLYGON ((-77.61368 -9.64900, -77.61241 -9.649... ANCASH+AIJA
9 10.0 02 03 ANCASH ANTONIO RAYMONDI POLYGON ((-77.08856 -8.97496, -77.08804 -8.975... ANCASH+ANTONIO RAYMONDI
In [947]:
covid_provYear_Alarm_w['location']=['+'.join(x[0]) for x in zip(covid_provYear_Alarm_w.iloc[:,:2].values)]
covid_provYear_Alarm_w.head()
Out[947]:
DEPARTAMENTO PROVINCIA Año2020 Año2021 Año2022 Año2023 Año2024 location
0 AMAZONAS BAGUA 0.250000 0.153846 0.052632 0.0 0.0 AMAZONAS+BAGUA
1 AMAZONAS BONGARA 0.250000 0.166667 0.000000 0.0 0.0 AMAZONAS+BONGARA
2 AMAZONAS CHACHAPOYAS 0.157895 0.161290 0.285714 0.0 0.0 AMAZONAS+CHACHAPOYAS
3 AMAZONAS CONDORCANQUI 0.500000 0.250000 0.000000 0.0 0.0 AMAZONAS+CONDORCANQUI
4 AMAZONAS LUYA 0.142857 0.153846 0.000000 0.0 0.0 AMAZONAS+LUYA
In [951]:
import unidecode


byePunctuation=lambda x: unidecode.unidecode(x)
covid_provYear_Alarm_w['location']=covid_provYear_Alarm_w['location'].apply(byePunctuation)
provmap['location']=provmap['location'].apply(byePunctuation)
In [953]:
# replacing dashes and multiple spaces by a simple space
covid_provYear_Alarm_w['location']=covid_provYear_Alarm_w.location.str.replace("\-|\_|\s+","",regex=True)
provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)
<>:2: SyntaxWarning: invalid escape sequence '\-'
<>:3: SyntaxWarning: invalid escape sequence '\-'
<>:2: SyntaxWarning: invalid escape sequence '\-'
<>:3: SyntaxWarning: invalid escape sequence '\-'
C:\Users\Sistema\AppData\Local\Temp\ipykernel_51764\1873358278.py:2: SyntaxWarning: invalid escape sequence '\-'
  covid_provYear_Alarm_w['location']=covid_provYear_Alarm_w.location.str.replace("\-|\_|\s+","",regex=True)
C:\Users\Sistema\AppData\Local\Temp\ipykernel_51764\1873358278.py:3: SyntaxWarning: invalid escape sequence '\-'
  provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)
In [955]:
nomatch_df=set(covid_provYear_Alarm_w.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_Alarm_w.location)
In [957]:
len(nomatch_df), len(nomatch_gdf)
Out[957]:
(2, 2)
In [959]:
from thefuzz import process
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
Out[959]:
[('ANCASH+ANTONIORAIMONDI', ('ANCASH+ANTONIORAYMONDI', 95)),
 ('ICA+NAZCA', ('ICA+NASCA', 89))]
In [961]:
{dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
Out[961]:
{'ANCASH+ANTONIORAIMONDI': 'ANCASH+ANTONIORAYMONDI', 'ICA+NAZCA': 'ICA+NASCA'}
In [963]:
changesinDF={dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
In [965]:
covid_provYear_Alarm_w.replace({'location': changesinDF}, inplace=True)
In [967]:
nomatch_df=set(covid_provYear_Alarm_w.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_Alarm_w.location)

[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
Out[967]:
[]
In [969]:
# Ahora puedes hacer el merge sin problemas, usando un nombre diferente para el indicador
covid_provYear_Alarm_map = provmap_df.merge(covid_provYear_Alarm_w, on='location', how='left', indicator='merge_flag')

# Revisar la información después del merge
print(covid_provYear_Alarm_map.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   OBJECTID      196 non-null    float64 
 1   CCDD          196 non-null    object  
 2   CCPP          196 non-null    object  
 3   DEPARTAMEN    196 non-null    object  
 4   PROVINCIA_x   196 non-null    object  
 5   location      196 non-null    object  
 6   DEPARTAMENTO  196 non-null    object  
 7   PROVINCIA_y   196 non-null    object  
 8   Año2020       196 non-null    float64 
 9   Año2021       196 non-null    float64 
 10  Año2022       196 non-null    float64 
 11  Año2023       196 non-null    float64 
 12  Año2024       196 non-null    float64 
 13  merge_flag    196 non-null    category
dtypes: category(1), float64(6), object(7)
memory usage: 20.4+ KB
None
In [971]:
import geopandas as gpd

# Asegurarte de que provmap es un GeoDataFrame
provmap = gpd.GeoDataFrame(provmap, geometry='geometry')

# Realizar el merge para agregar la columna de geometría
covid_provYear_Alarm_map = covid_provYear_Alarm_map.merge(provmap[['location', 'geometry']], on='location', how='left')

# Convertir a GeoDataFrame después del merge
covid_provYear_Alarm_map = gpd.GeoDataFrame(covid_provYear_Alarm_map, geometry='geometry')
In [973]:
print(covid_provYear_Alarm_map.head())
   OBJECTID CCDD CCPP DEPARTAMEN   PROVINCIA_x               location  \
0       1.0   01   01   AMAZONAS   CHACHAPOYAS   AMAZONAS+CHACHAPOYAS   
1       2.0   01   02   AMAZONAS         BAGUA         AMAZONAS+BAGUA   
2       3.0   01   03   AMAZONAS       BONGARA       AMAZONAS+BONGARA   
3       4.0   01   04   AMAZONAS  CONDORCANQUI  AMAZONAS+CONDORCANQUI   
4       5.0   01   05   AMAZONAS          LUYA          AMAZONAS+LUYA   

  DEPARTAMENTO   PROVINCIA_y   Año2020   Año2021   Año2022  Año2023  Año2024  \
0     AMAZONAS   CHACHAPOYAS  0.157895  0.161290  0.285714      0.0      0.0   
1     AMAZONAS         BAGUA  0.250000  0.153846  0.052632      0.0      0.0   
2     AMAZONAS       BONGARA  0.250000  0.166667  0.000000      0.0      0.0   
3     AMAZONAS  CONDORCANQUI  0.500000  0.250000  0.000000      0.0      0.0   
4     AMAZONAS          LUYA  0.142857  0.153846  0.000000      0.0      0.0   

  merge_flag                                           geometry  
0       both  POLYGON ((-77.72614 -5.94354, -77.72486 -5.943...  
1       both  POLYGON ((-78.61909 -4.51001, -78.61802 -4.510...  
2       both  POLYGON ((-77.72759 -5.14030, -77.72361 -5.140...  
3       both  POLYGON ((-77.81399 -2.99278, -77.81483 -2.995...  
4       both  POLYGON ((-78.13023 -5.90370, -78.13011 -5.904...  
In [975]:
# Convertir la columna 'merge_flag' a tipo string si existe
covid_provYear_Alarm_map['merge_flag'] = covid_provYear_Alarm_map['merge_flag'].astype(str)
In [977]:
# Crear la nueva columna 'PROVINCIA' que toma valores de 'PROVINCIA_x' y completa con 'PROVINCIA_y' si hay valores nulos
covid_provYear_Alarm_map['PROVINCIA'] = covid_provYear_Alarm_map['PROVINCIA_x'].combine_first(covid_provYear_Alarm_map['PROVINCIA_y'])

# Eliminar las columnas 'PROVINCIA_x' y 'PROVINCIA_y'
covid_provYear_Alarm_map.drop(columns=['PROVINCIA_x', 'PROVINCIA_y'], inplace=True)

# Verificar el resultado
print(covid_provYear_Alarm_map.head())
   OBJECTID CCDD CCPP DEPARTAMEN               location DEPARTAMENTO  \
0       1.0   01   01   AMAZONAS   AMAZONAS+CHACHAPOYAS     AMAZONAS   
1       2.0   01   02   AMAZONAS         AMAZONAS+BAGUA     AMAZONAS   
2       3.0   01   03   AMAZONAS       AMAZONAS+BONGARA     AMAZONAS   
3       4.0   01   04   AMAZONAS  AMAZONAS+CONDORCANQUI     AMAZONAS   
4       5.0   01   05   AMAZONAS          AMAZONAS+LUYA     AMAZONAS   

    Año2020   Año2021   Año2022  Año2023  Año2024 merge_flag  \
0  0.157895  0.161290  0.285714      0.0      0.0       both   
1  0.250000  0.153846  0.052632      0.0      0.0       both   
2  0.250000  0.166667  0.000000      0.0      0.0       both   
3  0.500000  0.250000  0.000000      0.0      0.0       both   
4  0.142857  0.153846  0.000000      0.0      0.0       both   

                                            geometry     PROVINCIA  
0  POLYGON ((-77.72614 -5.94354, -77.72486 -5.943...   CHACHAPOYAS  
1  POLYGON ((-78.61909 -4.51001, -78.61802 -4.510...         BAGUA  
2  POLYGON ((-77.72759 -5.14030, -77.72361 -5.140...       BONGARA  
3  POLYGON ((-77.81399 -2.99278, -77.81483 -2.995...  CONDORCANQUI  
4  POLYGON ((-78.13023 -5.90370, -78.13011 -5.904...          LUYA  
In [979]:
print(covid_provYear_Alarm_map.info())
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   OBJECTID      196 non-null    float64 
 1   CCDD          196 non-null    object  
 2   CCPP          196 non-null    object  
 3   DEPARTAMEN    196 non-null    object  
 4   location      196 non-null    object  
 5   DEPARTAMENTO  196 non-null    object  
 6   Año2020       196 non-null    float64 
 7   Año2021       196 non-null    float64 
 8   Año2022       196 non-null    float64 
 9   Año2023       196 non-null    float64 
 10  Año2024       196 non-null    float64 
 11  merge_flag    196 non-null    object  
 12  geometry      196 non-null    geometry
 13  PROVINCIA     196 non-null    object  
dtypes: float64(6), geometry(1), object(7)
memory usage: 21.6+ KB
None
In [981]:
# Crear la lista de columnas que deseas eliminar
bye = ['merge_flag', 'CCPP', 'CCDD', 'DEPARTAMENTO']

# Filtrar las columnas para incluir solo aquellas que existen en el DataFrame
bye = [col for col in bye if col in covid_provYear_Alarm_map.columns]

# Eliminar las columnas especificadas
covid_provYear_Alarm_map.drop(columns=bye, inplace=True)

# Visualizar las primeras filas para confirmar
covid_provYear_Alarm_map.head()
Out[981]:
OBJECTID DEPARTAMEN location Año2020 Año2021 Año2022 Año2023 Año2024 geometry PROVINCIA
0 1.0 AMAZONAS AMAZONAS+CHACHAPOYAS 0.157895 0.161290 0.285714 0.0 0.0 POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... CHACHAPOYAS
1 2.0 AMAZONAS AMAZONAS+BAGUA 0.250000 0.153846 0.052632 0.0 0.0 POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... BAGUA
2 3.0 AMAZONAS AMAZONAS+BONGARA 0.250000 0.166667 0.000000 0.0 0.0 POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... BONGARA
3 4.0 AMAZONAS AMAZONAS+CONDORCANQUI 0.500000 0.250000 0.000000 0.0 0.0 POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... CONDORCANQUI
4 5.0 AMAZONAS AMAZONAS+LUYA 0.142857 0.153846 0.000000 0.0 0.0 POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... LUYA
In [983]:
covid_provYear_Alarm_map.fillna(0,inplace=True)
In [985]:
# Ruta del archivo .shp (ajusta la ruta si es necesario)
shp_file_path = r"C:\Users\Sistema\OneDrive\Documentos\PUCP\2024-2\Herramientas cuantitativas\Tarea 4\GeoData Covid\GeoData-Covid-\maps\gadm41_PER_2.shp"

# Cargar el archivo Shapefile
provincias_peru = gpd.read_file(shp_file_path)

# Convertir multipartes a partes individuales
provincias_peru = provincias_peru.explode(index_parts=False)

# Mostrar las primeras filas para verificar la carga
print(provincias_peru.head())
       GID_2 GID_0 COUNTRY    GID_1    NAME_1 NL_NAME_1        NAME_2  \
0  PER.1.1_1   PER    Peru  PER.1_1  Amazonas        NA         Bagua   
0  PER.1.1_1   PER    Peru  PER.1_1  Amazonas        NA         Bagua   
1  PER.1.2_1   PER    Peru  PER.1_1  Amazonas        NA       Bongará   
2  PER.1.3_1   PER    Peru  PER.1_1  Amazonas        NA   Chachapoyas   
3  PER.1.4_1   PER    Peru  PER.1_1  Amazonas        NA  Condorcanqui   

  VARNAME_2 NL_NAME_2     TYPE_2 ENGTYPE_2 CC_2    HASC_2  \
0        NA        NA  Provincia  Province   NA  PE.AM.BG   
0        NA        NA  Provincia  Province   NA  PE.AM.BG   
1   Bongart        NA  Provincia  Province   NA  PE.AM.BN   
2        NA        NA  Provincia  Province   NA  PE.AM.CP   
3        NA        NA  Provincia  Province   NA  PE.AM.CQ   

                                            geometry  
0  POLYGON ((-78.29144 -5.55342, -78.28999 -5.556...  
0  POLYGON ((-78.63635 -4.49835, -78.63632 -4.498...  
1  POLYGON ((-77.76215 -5.81584, -77.76093 -5.816...  
2  POLYGON ((-77.78199 -6.94651, -77.78417 -6.950...  
3  POLYGON ((-78.10184 -5.34889, -78.10178 -5.348...  
In [986]:
provincias_peru.to_file("provinciasPeru.gpkg", layer='provincias', driver="GPKG")
In [989]:
import os

os.makedirs('maps', exist_ok=True)
covid_provYear_Alarm_map.to_file(os.path.join('maps', "provinciasPeru.gpkg"), layer='provinciasCovid', driver="GPKG")
In [1056]:
covid_provYear_Alarm_map.Año2020.describe()
Out[1056]:
count    196.000000
mean       0.206520
std        0.164606
min        0.000000
25%        0.098810
50%        0.182458
75%        0.285714
max        1.000000
Name: Año2020, dtype: float64
In [1054]:
import seaborn as sea

sea.boxplot(covid_provYear_Alarm_map.Año2020, color='skyblue',orient='h')
Out[1054]:
<Axes: xlabel='Año2020'>
No description has been provided for this image
In [1058]:
from sklearn.preprocessing import QuantileTransformer
qt = QuantileTransformer(n_quantiles=100, random_state=0,output_distribution='normal')
qt_result=qt.fit_transform(covid_provYear_Alarm_map[['Año2020']])
sea.boxplot(qt_result, color='skyblue',orient='h')
Out[1058]:
<Axes: >
No description has been provided for this image
In [1060]:
covid_provYear_Alarm_map['Año_2020_qt']=qt_result
In [1062]:
from libpysal.weights import Queen, Rook, KNN

# rook

w_rook = Rook.from_dataframe(covid_provYear_Alarm_map,use_index=False)
In [1063]:
w_queen = Queen.from_dataframe(covid_provYear_Alarm_map,use_index=False)
In [1076]:
# Filtrar los datos de Lima
covid_provYear_Alarm_map_lima = covid_provYear_Alarm_map[covid_provYear_Alarm_map['DEPARTAMEN'] == 'LIMA']

# Verificar los datos de Lima
print(covid_provYear_Alarm_map_lima.head())
     OBJECTID DEPARTAMEN        location   Año2020   Año2021   Año2022  \
127     128.0       LIMA       LIMA+LIMA  0.093098  0.049750  0.037594   
128     129.0       LIMA   LIMA+BARRANCA  0.201754  0.083333  0.000000   
129     130.0       LIMA  LIMA+CAJATAMBO  0.941176  0.000000  0.000000   
130     131.0       LIMA      LIMA+CANTA  0.083333  0.083333  0.000000   
131     132.0       LIMA     LIMA+CANETE  0.235119  0.051429  0.034483   

      Año2023  Año2024                                           geometry  \
127  0.027027      0.0  MULTIPOLYGON (((-77.06517 -11.57512, -77.06505...   
128  0.000000      0.0  POLYGON ((-77.73536 -10.32351, -77.73407 -10.3...   
129  0.000000      0.0  POLYGON ((-76.90483 -10.27726, -76.90451 -10.2...   
130  0.000000      0.0  POLYGON ((-76.56497 -11.31924, -76.56074 -11.3...   
131  0.000000      0.0  POLYGON ((-76.55171 -12.28024, -76.55021 -12.2...   

     PROVINCIA  Año_2021_qt  Año_2020_qt  
127       LIMA    -0.772468    -0.732259  
128   BARRANCA    -0.321971     0.143629  
129  CAJATAMBO    -5.199338     2.843409  
130      CANTA    -0.321971    -0.834427  
131     CAÑETE    -0.709378     0.267958  
In [1078]:
from libpysal.weights import Queen, Rook, KNN

# Calcular la vecindad espacial con el método Queen para los datos de Lima
w_queen_lima = Queen.from_dataframe(covid_provYear_Alarm_map_lima, use_index=False)

# Calcular la vecindad espacial con el método Rook para los datos de Lima
w_rook_lima = Rook.from_dataframe(covid_provYear_Alarm_map_lima, use_index=False)

# Calcular la vecindad espacial usando K-Nearest Neighbors (por ejemplo, k=8) para los datos de Lima
w_knn_lima = KNN.from_dataframe(covid_provYear_Alarm_map_lima, k=8)

# Verificar los resultados de los pesos de vecindad
print(w_queen_lima)
print(w_rook_lima)
print(w_knn_lima)
<libpysal.weights.contiguity.Queen object at 0x0000017757415730>
<libpysal.weights.contiguity.Rook object at 0x000001774FD463C0>
<libpysal.weights.distance.KNN object at 0x0000017735C99E50>
In [1080]:
# Seleccionar la provincia de Lima específica para el análisis
base = covid_provYear_Alarm_map_lima[covid_provYear_Alarm_map_lima.PROVINCIA == "LIMA"].plot()

# Graficar los vecinos usando el índice de vecindad de 'Rook' para la primera provincia en el conjunto de Lima
covid_provYear_Alarm_map_lima.iloc[w_rook_lima.neighbors[0],].plot(ax=base, facecolor="yellow", edgecolor='k')

# Graficar la provincia seleccionada en rojo
covid_provYear_Alarm_map_lima.head(1).plot(ax=base, facecolor="red")
Out[1080]:
<Axes: >
No description has been provided for this image
In [1082]:
w_queen.neighbors
Out[1082]:
{0: [2, 114, 4, 5, 180, 182, 55],
 1: [3, 60, 61, 6],
 2: [0, 143, 3, 4, 6, 182, 175],
 3: [1, 2, 6, 143],
 4: [0, 2, 6, 55, 56],
 5: [0, 178, 180, 182, 175],
 6: [1, 2, 3, 4, 56, 58, 60],
 7: [16, 17, 23, 8, 26, 12, 14],
 8: [17, 7, 23],
 9: [16, 90, 13],
 10: [16, 26, 12, 13],
 11: [96, 129, 128, 16, 17, 20, 23, 89, 91],
 12: [16, 26, 10, 7],
 13: [16, 19, 26, 90, 9, 10],
 14: [24, 17, 26, 7],
 15: [24, 25, 18, 21],
 16: [7, 9, 10, 11, 12, 13, 23, 90, 91],
 17: [128, 23, 7, 8, 11, 14],
 18: [22, 24, 25, 26, 15],
 19: [13, 26, 22, 90, 93],
 20: [128, 129, 11, 134],
 21: [119, 24, 121, 123, 25, 15],
 22: [18, 19, 119, 25, 26, 93],
 23: [16, 17, 7, 8, 11],
 24: [18, 21, 121, 26, 123, 14, 15],
 25: [18, 21, 22, 119, 15],
 26: [7, 10, 12, 13, 14, 18, 19, 22, 24],
 27: [33, 69, 75, 28, 29, 30, 31],
 28: [32, 75, 46, 47, 48, 50, 52, 27, 30],
 29: [48, 33, 73, 27, 31, 30, 41],
 30: [48, 27, 28, 29],
 31: [33, 69, 73, 27, 76, 29],
 32: [42, 52, 28, 46],
 33: [27, 29, 31],
 34: [35, 149, 38, 40, 172],
 35: [34, 36, 37, 38, 39, 40],
 36: [48, 49, 35, 100, 39, 41, 47],
 37: [35, 38, 39],
 38: [34, 35, 37, 39, 168, 73, 74, 172],
 39: [35, 36, 37, 38, 73, 74, 41],
 40: [34, 35, 148, 149, 150],
 41: [48, 49, 36, 39, 73, 29],
 42: [32, 82, 52, 85, 43, 45, 46],
 43: [42, 51, 52, 85],
 44: [51, 85, 47],
 45: [42, 75, 108, 46, 81, 82, 84, 86],
 46: [32, 42, 75, 28, 45],
 47: [36, 100, 101, 44, 48, 50, 51, 85, 28],
 48: [49, 36, 41, 28, 29, 30, 47],
 49: [48, 41, 36],
 50: [51, 52, 28, 47],
 51: [50, 52, 85, 43, 44, 47],
 52: [32, 50, 51, 42, 43, 28],
 53: [64, 54, 55, 57, 122, 59, 62],
 54: [114, 53, 117, 120, 122, 62],
 55: [0, 114, 4, 53, 56, 59, 62],
 56: [65, 4, 6, 55, 58, 59, 124, 125],
 57: [64, 113, 115, 53, 118, 122, 63],
 58: [56, 60, 125, 6],
 59: [64, 65, 53, 55, 56, 63],
 60: [1, 125, 6, 58, 156, 61, 126],
 61: [1, 156, 60],
 62: [114, 53, 54, 55],
 63: [64, 65, 115, 57, 59, 124],
 64: [57, 59, 53, 63],
 65: [56, 59, 124, 63],
 66: [127],
 67: [69, 70, 76, 78, 79],
 68: [71, 72, 73, 76, 78],
 69: [67, 75, 27, 76, 79, 31],
 70: [146, 67, 75, 77, 78, 79],
 71: [169, 68, 72, 73, 74],
 72: [68, 164, 71, 169, 78],
 73: [68, 38, 39, 71, 41, 74, 76, 29, 31],
 74: [169, 38, 39, 168, 73, 71],
 75: [193, 69, 70, 108, 45, 46, 79, 146, 27, 28],
 76: [67, 68, 69, 73, 78, 31],
 77: [78, 146, 70],
 78: [67, 68, 164, 70, 72, 76, 77, 146],
 79: [75, 67, 69, 70],
 80: [103, 136, 81, 82, 83, 84, 85, 86],
 81: [80, 82, 84, 45],
 82: [80, 81, 85, 42, 45],
 83: [80, 99, 85, 102, 136],
 84: [80, 81, 45, 86],
 85: [98, 101, 102, 42, 43, 44, 47, 80, 82, 51, 83],
 86: [80, 84, 103, 108, 45],
 87: [96, 97, 88, 89, 92, 94],
 88: [96, 87, 151, 152, 94],
 89: [96, 97, 87, 11, 91, 92],
 90: [16, 19, 93, 9, 91, 92, 13],
 91: [16, 89, 90, 11, 92],
 92: [194, 142, 176, 87, 184, 89, 90, 91, 93, 94, 95],
 93: [19, 22, 119, 184, 90, 92],
 94: [87, 151, 153, 88, 92, 95],
 95: [192, 194, 153, 92, 94],
 96: [129, 97, 135, 11, 87, 88, 89, 152],
 97: [96, 89, 87],
 98: [101, 100, 85, 102],
 99: [136, 83, 131, 102],
 100: [98, 36, 101, 47],
 101: [98, 100, 85, 47],
 102: [83, 98, 99, 85],
 103: [80, 86, 136, 108, 104, 111],
 104: [103, 136, 106, 108, 111],
 105: [153, 106, 107, 108, 109],
 106: [108, 133, 136, 105, 104, 109, 110],
 107: [153, 151, 105, 109, 110],
 108: [193, 103, 104, 105, 106, 75, 45, 86, 153],
 109: [105, 106, 107, 110],
 110: [130, 132, 133, 151, 106, 107, 109],
 111: [136, 104, 103],
 112: [113, 123, 116, 117],
 113: [112, 117, 118, 57, 122],
 114: [0, 180, 55, 54, 119, 120, 62],
 115: [57, 124, 118, 63],
 116: [112, 121, 123, 117],
 117: [112, 113, 116, 54, 120, 121, 122],
 118: [113, 115, 57],
 119: [114, 180, 21, 22, 184, 121, 120, 93, 25],
 120: [114, 117, 54, 119, 121],
 121: [116, 21, 117, 119, 24, 123, 120],
 122: [113, 117, 53, 54, 57],
 123: [112, 116, 21, 24, 121],
 124: [65, 115, 56, 125, 126, 63],
 125: [60, 56, 58, 124, 126],
 126: [124, 161, 156, 157, 154, 60, 125],
 127: [66, 130, 132, 133, 131],
 128: [17, 11, 20, 134],
 129: [96, 20, 134, 135, 11],
 130: [132, 133, 110, 127],
 131: [136, 99, 133, 127],
 132: [130, 134, 151, 110, 127],
 133: [130, 131, 136, 106, 110, 127],
 134: [128, 129, 132, 20, 135, 151],
 135: [96, 129, 134, 151, 152],
 136: [99, 131, 133, 103, 104, 106, 111, 80, 83],
 137: [144, 139, 140, 141],
 138: [143, 179, 183, 139, 141, 175],
 139: [137, 138, 141, 143],
 140: [144, 137, 141],
 141: [192, 183, 137, 138, 139, 140, 142],
 142: [192, 176, 194, 181, 183, 92, 141],
 143: [2, 3, 138, 139, 175],
 144: [137, 140],
 145: [193, 146, 147, 164, 173],
 146: [193, 145, 164, 70, 75, 77, 78],
 147: [145, 195, 193],
 148: [162, 149, 166, 150, 40, 186, 187],
 149: [34, 162, 148, 40, 172],
 150: [40, 187, 148],
 151: [132, 134, 135, 107, 110, 88, 153, 152, 94],
 152: [96, 151, 88, 135],
 153: [192, 193, 105, 107, 108, 151, 94, 95],
 154: [161, 126, 155, 157, 158, 159],
 155: [154, 156, 157, 159],
 156: [157, 155, 60, 61, 126],
 157: [154, 155, 156, 126],
 158: [160, 161, 154, 159],
 159: [160, 158, 154, 155, 189, 190],
 160: [158, 190, 159],
 161: [154, 126, 158],
 162: [148, 149, 166, 167, 172],
 163: [164, 167, 168, 169, 171, 172],
 164: [163, 72, 169, 171, 173, 78, 145, 146],
 165: [174, 166],
 166: [162, 148, 165, 186, 188],
 167: [162, 163, 170, 171, 172],
 168: [163, 38, 169, 74, 172],
 169: [163, 164, 71, 72, 74, 168],
 170: [167],
 171: [163, 164, 173, 167],
 172: [34, 162, 163, 149, 38, 167, 168],
 173: [145, 171, 164],
 174: [165],
 175: [2, 5, 138, 143, 177, 178, 179, 182],
 176: [177, 178, 180, 181, 184, 92, 142],
 177: [176, 178, 179, 181, 175],
 178: [176, 177, 180, 5, 175],
 179: [177, 181, 183, 138, 175],
 180: [0, 176, 178, 114, 5, 119, 184],
 181: [176, 177, 179, 183, 142],
 182: [0, 2, 5, 175],
 183: [179, 181, 138, 141, 142],
 184: [176, 180, 119, 92, 93],
 185: [186, 187, 188],
 186: [148, 166, 185, 187, 188],
 187: [148, 150, 185, 186, 188],
 188: [185, 186, 187, 166],
 189: [159, 190, 191],
 190: [160, 189, 159],
 191: [189],
 192: [193, 194, 153, 141, 142, 95],
 193: [192, 195, 75, 108, 145, 146, 147, 153],
 194: [192, 92, 142, 95],
 195: [193, 147]}
In [1084]:
pd.DataFrame(*w_queen.full()).astype(int) # 1 means both are neighbors
Out[1084]:
0 1 2 3 4 5 6 7 8 9 ... 186 187 188 189 190 191 192 193 194 195
0 0 0 1 0 1 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 1 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 0 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 1 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 1 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
191 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
192 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 1 0
193 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 1
194 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
195 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0

196 rows × 196 columns

In [1086]:
w_queen.pct_nonzero
Out[1086]:
2.7332361516034984
In [1088]:
w_queen.islands
Out[1088]:
[]
In [1090]:
w_queen.transform = 'R'
In [1092]:
pd.DataFrame(*w_queen.full()).sum(axis=1) # 1 means both are neighbors
Out[1092]:
0      1.0
1      1.0
2      1.0
3      1.0
4      1.0
      ... 
191    1.0
192    1.0
193    1.0
194    1.0
195    1.0
Length: 196, dtype: float64
In [1094]:
from esda.moran import Moran

moranCOVID = Moran(covid_provYear_Alarm_map['Año_2020_qt'], w_queen)
moranCOVID.I,moranCOVID.p_sim
Out[1094]:
(0.10863623693995704, 0.007)
In [1096]:
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt

fig, ax = moran_scatterplot(moranCOVID)
ax.set_xlabel('Covid_criterio_share')
ax.set_ylabel('SpatialLag_Covid_criterio_share')
Out[1096]:
Text(0, 0.5, 'SpatialLag_Covid_criterio_share')
No description has been provided for this image
In [1104]:
# The scatterplot with local info

from esda.moran import Moran_Local

# calculate Moran_Local and plot
lisaCOVID = Moran_Local(y=covid_provYear_Alarm_map['Año_2020_qt'], w=w_knn,seed=2022)
fig, ax = moran_scatterplot(lisaCOVID,p=0.05)
ax.set_xlabel('Covid_criterios_share')
ax.set_ylabel('SpatialLag_Covid_criterios_share');
No description has been provided for this image
In [1106]:
from splot.esda import plot_local_autocorrelation
plot_local_autocorrelation(lisaCOVID, covid_provYear_Alarm_map,'Año_2020_qt')
plt.show()
No description has been provided for this image
In [1108]:
# the map with the spots and outliers

from splot.esda import lisa_cluster
f, ax = plt.subplots(1, figsize=(12, 12))
plt.title('Spots and Outliers')
fig = lisa_cluster(lisaCOVID,
                   covid_provYear_Alarm_map,ax=ax,
                   legend_kwds={'loc': 'center left',
                                'bbox_to_anchor': (0.7, 0.6)})
No description has been provided for this image
In [1110]:
lisaCOVID.q
Out[1110]:
array([4, 1, 1, 1, 1, 2, 1, 3, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 2,
       4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 3, 3, 3, 3, 3, 4, 4, 3, 4, 3,
       3, 1, 1, 4, 2, 4, 4, 3, 3, 1, 1, 1, 4, 1, 2, 1, 1, 1, 1, 1, 1, 2,
       4, 1, 2, 4, 1, 2, 4, 4, 4, 1, 3, 1, 4, 1, 1, 1, 1, 1, 1, 4, 2, 4,
       3, 4, 3, 4, 4, 3, 4, 1, 3, 3, 1, 1, 4, 4, 1, 2, 4, 1, 2, 3, 1, 1,
       1, 1, 1, 1, 2, 1, 4, 1, 1, 4, 1, 3, 1, 4, 1, 1, 1, 2, 4, 4, 3, 1,
       1, 2, 2, 3, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 3, 4, 3, 4, 4, 4, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 4, 4, 4, 2, 4, 3, 1, 1, 4, 1, 4, 1,
       4, 4, 4, 2, 4, 3, 1, 1, 4, 4, 3, 3, 3, 1, 1, 1, 1, 1, 1, 2])
In [1112]:
lisaCOVID.p_sim
Out[1112]:
array([0.194, 0.039, 0.403, 0.037, 0.451, 0.156, 0.173, 0.141, 0.107,
       0.016, 0.105, 0.03 , 0.032, 0.008, 0.119, 0.027, 0.082, 0.026,
       0.004, 0.045, 0.25 , 0.465, 0.002, 0.074, 0.118, 0.06 , 0.143,
       0.459, 0.09 , 0.322, 0.181, 0.444, 0.303, 0.123, 0.1  , 0.118,
       0.461, 0.437, 0.256, 0.09 , 0.012, 0.274, 0.245, 0.101, 0.332,
       0.112, 0.495, 0.066, 0.466, 0.141, 0.017, 0.186, 0.273, 0.035,
       0.084, 0.433, 0.424, 0.033, 0.352, 0.473, 0.298, 0.146, 0.324,
       0.22 , 0.262, 0.079, 0.458, 0.5  , 0.357, 0.397, 0.214, 0.208,
       0.419, 0.143, 0.352, 0.067, 0.206, 0.361, 0.354, 0.444, 0.282,
       0.239, 0.107, 0.16 , 0.138, 0.34 , 0.262, 0.162, 0.27 , 0.003,
       0.01 , 0.011, 0.165, 0.095, 0.12 , 0.162, 0.083, 0.085, 0.197,
       0.113, 0.361, 0.338, 0.057, 0.382, 0.463, 0.265, 0.467, 0.326,
       0.356, 0.405, 0.486, 0.437, 0.002, 0.034, 0.427, 0.274, 0.194,
       0.002, 0.016, 0.011, 0.453, 0.196, 0.002, 0.355, 0.269, 0.421,
       0.153, 0.481, 0.066, 0.017, 0.407, 0.302, 0.344, 0.486, 0.451,
       0.078, 0.388, 0.022, 0.199, 0.03 , 0.074, 0.267, 0.15 , 0.009,
       0.021, 0.41 , 0.427, 0.415, 0.004, 0.414, 0.004, 0.114, 0.013,
       0.498, 0.216, 0.141, 0.273, 0.245, 0.156, 0.119, 0.177, 0.266,
       0.154, 0.255, 0.38 , 0.048, 0.058, 0.156, 0.298, 0.427, 0.19 ,
       0.226, 0.338, 0.351, 0.173, 0.457, 0.432, 0.371, 0.086, 0.479,
       0.306, 0.322, 0.333, 0.241, 0.003, 0.008, 0.028, 0.024, 0.027,
       0.146, 0.183, 0.178, 0.018, 0.338, 0.195, 0.355])
In [1114]:
pd.Series(lisaCOVID.q).value_counts()
Out[1114]:
1    77
4    61
3    37
2    21
Name: count, dtype: int64
In [1116]:
covid_provYear_Alarm_map['COVID_quadrant']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID.q,lisaCOVID.p_sim)  ]
covid_provYear_Alarm_map['COVID_quadrant'].value_counts()
Out[1116]:
COVID_quadrant
0    157
4     17
1     14
3      8
Name: count, dtype: int64
In [1118]:
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']

covid_provYear_Alarm_map['COVID_quadrant_names']=[labels[i] for i in covid_provYear_Alarm_map['COVID_quadrant']]

covid_provYear_Alarm_map['COVID_quadrant_names'].value_counts()
Out[1118]:
COVID_quadrant_names
0 no_sig        157
4 hotOutlier     17
1 hotSpot        14
3 coldSpot        8
Name: count, dtype: int64
In [1120]:
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])




f, ax = plt.subplots(1, figsize=(12,12))


plt.title('Spots and Outliers')

covid_provYear_Alarm_map.plot(column='COVID_quadrant_names',
                categorical=True,
                cmap=myColMap,
                linewidth=0.1,
                edgecolor='white',
                legend=True,
                legend_kwds={'loc': 'center left',
                             'bbox_to_anchor': (0.7, 0.6)},
                ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
No description has been provided for this image
In [1136]:
covid_provYear_Alarm_map.explore("COVID_quadrant_names", categorical=True,tooltip='location',cmap=myColMap)
Out[1136]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [1130]:
print(map1.columns)
Index(['OBJECTID', 'DEPARTAMEN', 'location', 'Año2020', 'Año2021', 'Año2022',
       'Año2023', 'Año2024', 'geometry', 'PROVINCIA', 'Año_2021_qt',
       'Año_2020_qt', 'COVID_quadrant', 'COVID_quadrant_names'],
      dtype='object')
In [1132]:
print(map1.shape, map2.shape, map3.shape, map4.shape)
(14, 14) (0, 14) (8, 14) (17, 14)
In [1134]:
import folium

m = map1.explore(
    color="red",
    tooltip="location",
    popup="location",
    name="hotSpot"
)

if not map2.empty:
    map2.explore(
        m=m,
        color="green",
        tooltip="location",
        popup="location",
        name="coldOutlier"
    )

if not map3.empty:
    map3.explore(
        m=m,
        color="black",
        tooltip="location",
        popup="location",
        name="coldSpot"
    )

if not map4.empty:
    map4.explore(
        m=m,
        color="orange",
        tooltip="location",
        popup="location",
        name="hotOutlier"
    )

folium.TileLayer("CartoDB positron", show=False).add_to(m)
folium.LayerControl(collapsed=True).add_to(m)

m  # mostrar el mapa
Out[1134]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [1138]:
#from esda.moran import Moran_BV, Moran_Local_BV
from esda.moran import Moran_BV

mbi = Moran_BV(covid_provYear_Alarm_map['Año2020'],  covid_provYear_Alarm_map['Año2021'],  w_queen)
mbi.I,mbi.p_sim
Out[1138]:
(0.05662157744151253, 0.083)
In [1144]:
# The scatterplot with local info
from esda.moran import Moran_Local_BV

# calculate Moran_Local and plot
lisaCOVID_bv = Moran_Local_BV(y=covid_provYear_Alarm_map['Año2020'],
                               x=covid_provYear_Alarm_map['Año2021'],
                               w=w_queen)

fig, ax = moran_scatterplot(lisaCOVID_bv, p=0.05,aspect_equal=True)

ax.set_xlabel('Covid_2021')
ax.set_ylabel('SpatialLag_Covid_2020')
plt.show()
No description has been provided for this image
In [1146]:
covid_provYear_Alarm_map['COVID_quadrant_20_21']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID_bv.q,lisaCOVID_bv.p_sim)  ]

labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']

covid_provYear_Alarm_map['COVID_quadrant_20_21_names']=[labels[i] for i in covid_provYear_Alarm_map['COVID_quadrant_20_21']]
In [1148]:
covid_provYear_Alarm_map
Out[1148]:
OBJECTID DEPARTAMEN location Año2020 Año2021 Año2022 Año2023 Año2024 geometry PROVINCIA Año_2021_qt Año_2020_qt COVID_quadrant COVID_quadrant_names COVID_quadrant_20_21 COVID_quadrant_20_21_names
0 1.0 AMAZONAS AMAZONAS+CHACHAPOYAS 0.157895 0.161290 0.285714 0.0 0.0 POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... CHACHAPOYAS 0.321971 -0.100543 0 0 no_sig 0 0 no_sig
1 2.0 AMAZONAS AMAZONAS+BAGUA 0.250000 0.153846 0.052632 0.0 0.0 POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... BAGUA 0.264291 0.389414 1 1 hotSpot 1 1 hotSpot
2 3.0 AMAZONAS AMAZONAS+BONGARA 0.250000 0.166667 0.000000 0.0 0.0 POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... BONGARA 0.403108 0.389414 0 0 no_sig 1 1 hotSpot
3 4.0 AMAZONAS AMAZONAS+CONDORCANQUI 0.500000 0.250000 0.000000 0.0 0.0 POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... CONDORCANQUI 0.870846 1.690622 1 1 hotSpot 0 0 no_sig
4 5.0 AMAZONAS AMAZONAS+LUYA 0.142857 0.153846 0.000000 0.0 0.0 POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... LUYA 0.264291 -0.255962 0 0 no_sig 0 0 no_sig
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
191 192.0 TUMBES TUMBES+ZARUMILLA 0.306122 0.156250 0.000000 0.0 0.0 POLYGON ((-80.28521 -3.41276, -80.28406 -3.412... ZARUMILLA 0.294604 0.794809 0 0 no_sig 0 0 no_sig
192 193.0 UCAYALI UCAYALI+CORONELPORTILLO 0.283002 0.374046 0.000000 0.0 0.0 POLYGON ((-74.47145 -7.27617, -74.47052 -7.277... CORONEL PORTILLO 1.398625 0.651161 1 1 hotSpot 1 1 hotSpot
193 194.0 UCAYALI UCAYALI+ATALAYA 0.562500 0.210526 0.000000 0.0 0.0 POLYGON ((-73.18146 -9.41174, -73.13475 -9.411... ATALAYA 0.679350 2.057244 0 0 no_sig 0 0 no_sig
194 195.0 UCAYALI UCAYALI+PADREABAD 0.441860 0.388889 0.333333 0.0 0.0 POLYGON ((-75.43663 -8.22999, -75.43651 -8.230... PADRE ABAD 1.578089 1.367739 0 0 no_sig 0 0 no_sig
195 196.0 UCAYALI UCAYALI+PURUS 0.000000 0.000000 0.000000 0.0 0.0 POLYGON ((-70.61380 -9.87339, -70.62140 -9.878... PURUS -5.199338 -5.199338 0 0 no_sig 0 0 no_sig

196 rows × 16 columns

In [1150]:
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])




f, ax = plt.subplots(1, figsize=(12,12))


plt.title('Spots and Outliers')

covid_provYear_Alarm_map.plot(column='COVID_quadrant_20_21_names',
                categorical=True,
                cmap=myColMap,
                linewidth=0.1,
                edgecolor='white',
                legend=True,
                legend_kwds={'loc': 'center left',
                             'bbox_to_anchor': (0.7, 0.6)},
                ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
No description has been provided for this image
In [ ]: